# Direct outputs eligible for public release

# GLOBAL SETTINGS --------------------------------------------------------------

options(
  scipen = 999,
  digits = 16,
  max.print = .Machine$integer.max,
  show.error.locations = TRUE,
  warn = 1
)

RNGkind("L'Ecuyer-CMRG")
seed <- 818675309L
set.seed(seed) # setting main seed

# PACKAGES ---------------------------------------------------------------------
library(data.table)
library(ggplot2)
library(scales)

# PACKAGE SETTINGS -------------------------------------------------------------

# data.table
setDTthreads(threads = 1L)
options(datatable.print.class = TRUE, datatable.print.keys = TRUE)
# so that printing the data.table also shows the variable type on top

# BEGIN FILE -------------------------------------------------------------------

outcome <- "db_w2_wages"

# Read in all estimates

did_laterwinners_coefs <-
    readRDS(
        sprintf("~/estimation-output/event_study_estimates_%s.rds", outcome)
    )

did_laterwinners_coefs <- setDT(did_laterwinners_coefs[[1]])

did_laterwinners_coefs <-
    did_laterwinners_coefs[
        between(ref_event_time, -7L, 5L, incbounds = TRUE) &
        ref_onset_time == "Cohort-Weighted" &
        model == "reduced_form" &
        rn == "att",
        .(
            ref_event_time,
            estimate,
            cluster_se
        )
    ]

# Introduce a omitted_event_time row
did_laterwinners_coefs <-
    rbindlist(
        list(
            did_laterwinners_coefs,
            data.table(
                ref_event_time = -2L,
                estimate = 0,
                cluster_se = 0
            )
        ),
        use.names = TRUE
    )
setorderv(did_laterwinners_coefs, "ref_event_time")

did_ipw_laterwinners_coefs <-
    readRDS(
        sprintf("~/estimation-output/event_study_estimates_%s_ipw.rds", outcome)
    )

did_ipw_laterwinners_coefs <- setDT(did_ipw_laterwinners_coefs[[1]])

did_ipw_laterwinners_coefs <-
    did_ipw_laterwinners_coefs[
        between(ref_event_time, -7L, 5L, incbounds = TRUE) &
        ref_onset_time == "Cohort-Weighted" &
        model == "reduced_form" &
        rn == "att",
        .(
            ref_event_time,
            estimate,
            cluster_se
        )
    ]

# Introduce a omitted_event_time row
did_ipw_laterwinners_coefs <-
    rbindlist(
        list(
            did_ipw_laterwinners_coefs,
            data.table(
                ref_event_time = -2L,
                estimate = 0,
                cluster_se = 0
            )
        ),
        use.names = TRUE
    )
setorderv(did_ipw_laterwinners_coefs, "ref_event_time")

# Label and combine estimates

did_laterwinners_coefs[, estimate_label := "Age Dummies"]
did_ipw_laterwinners_coefs[, estimate_label := "Non-Parametric Age Controls"]

two_specs <-
    rbindlist(
        list(did_laterwinners_coefs, did_ipw_laterwinners_coefs),
        use.names = TRUE
    )

two_specs[
    ,
    estimate_label :=
        factor(
            estimate_label,
            levels = c("Age Dummies", "Non-Parametric Age Controls")
        )
]

setnames(two_specs, "ref_event_time", "event_time")

# Produce Appendix Figure B.3 - preferred regression versus IPW estimates

figure_b_3 <-
    ggplot(
        aes(x = event_time, y = estimate, color = factor(estimate_label)),
        data = two_specs[between(event_time, -7L, 5L, incbounds = TRUE)]
    ) +
    geom_line() +
    geom_ribbon(
        aes(
            ymin = estimate - (1.64 * cluster_se),
            ymax = estimate + (1.64 * cluster_se)
        ),
        linetype = 0,
        alpha = 0.2,
        show.legend = FALSE
    ) +
    theme_bw(base_size = 13) +
    theme(panel.grid.minor = element_blank()) +
    scale_x_continuous(
        breaks = seq.int(from = -7L, to = 5L, by = 1L),
        expand = expansion(mult = c(0.0025, 0.0025))
    ) +
    scale_y_continuous(breaks = breaks_extended(n = 10)) +
    labs(x = "Event Time (ℓ)", y = "Event Study Estimate (2016 USD)") +
    theme(
        legend.title = element_blank(),
        legend.position = c(0.2175, 0.0825),
        legend.background = element_rect(fill = "white", color = "grey"),
        strip.text.x = element_blank(),
        strip.background = element_blank(),
        legend.key = element_rect(NA),
        legend.spacing.y = unit(2, "mm"),
        legend.key.height = unit(4, "mm"),
        legend.key.width = unit(4, "mm"),
        legend.margin = margin(t = 0, r = 0.1, b = 0.15, l = 0.1, unit = "cm")
    ) +
    scale_color_viridis_d() +
    guides(color = guide_legend(ncol = 1, byrow = TRUE)) +
    coord_cartesian(ylim = c(-5000, 500))

ggsave(
    plot = figure_b_3,
    filename = "~/paper/figures/figure-B.3.png",
    width = 6,
    height = 4,
    dpi = 600
)

ggsave(
    plot = figure_b_3,
    filename = "~/paper/figures/figure-B.3.tif",
    width = 6,
    height = 4,
    device = "tiff",
    dpi = 600
)

# Housekeeping
outcome <- NULL
did_laterwinners_coefs <- NULL
did_ipw_laterwinners_coefs <- NULL
two_specs <- NULL
figure_b_3 <- NULL
rm(
    outcome,
    did_laterwinners_coefs,
    did_ipw_laterwinners_coefs,
    two_specs,
    figure_b_3
)